{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Building prefix dict from the default dictionary ...\n",
      "Dumping model to file cache C:\\Users\\ADMINI~1\\AppData\\Local\\Temp\\jieba.cache\n",
      "Loading model cost 0.799 seconds.\n",
      "Prefix dict has been built successfully.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "'周瑜'的词向量为：\n",
      " [-0.4130814   0.40998384  0.25047174  0.73942614 -0.24401875  0.60114616\n",
      " -0.20168497  0.37192357 -1.2244401   0.1999061   0.28947642 -0.9191322\n",
      " -0.22305013 -0.4266497   0.14982763  0.19327226 -0.30716994 -0.2752266\n",
      " -0.3359485  -1.3712827 ]\n",
      "与'周瑜'相似度最高的10个词：\n",
      "[('孙策', 0.9292185306549072), ('钟会', 0.9210042357444763), ('陆逊', 0.9137358069419861), ('孟获', 0.9075760245323181), ('夏侯楙', 0.9048130512237549), ('孙权', 0.9013352990150452), ('吕布', 0.9009506702423096), ('曹操', 0.8988862037658691), ('鲁肃', 0.8962713479995728), ('孔明', 0.8947500586509705)]\n",
      "'刘备'和'曹操'的相似度：0.8122937083244324\n",
      "在词'孙权/曹操/刘备/孙夫人'中，'刘备'与其他词不属于同一类\n"
     ]
    }
   ],
   "source": [
    "import jieba \n",
    "import re\n",
    "from gensim.models import Word2Vec\n",
    "with open(\"sanguo.txt\",encoding='utf-8') as f:\n",
    "    lines=[]\n",
    "    for line in f:\n",
    "        temp=jieba.lcut(line)\n",
    "        words = []\n",
    "        for i in temp:\n",
    "            i = re.sub(\"[\\s+\\.\\!\\/_,$%^*(+\\\"\\'””《》]+|[+——！，。？、~@#￥%……&*（）：；‘]+\", \"\", i)\n",
    "            if len(i)>0:\n",
    "                words.append(i)\n",
    "        if len(words) > 0:\n",
    "            lines.append(words)\n",
    "model=Word2Vec(lines,vector_size=20,window=2,min_count=3,epochs=7,negative=10,sg=1)\n",
    "print(\"'周瑜'的词向量为：\\n\",model.wv.get_vector('周瑜'))\n",
    "print(\"与'周瑜'相似度最高的10个词：\")\n",
    "print(model.wv.most_similar('周瑜',topn=10))\n",
    "print(\"'刘备'和'曹操'的相似度：{}\".format(model.wv.similarity('刘备','曹操')))\n",
    "words=\"孙权 曹操 刘备 孙夫人\"\n",
    "print(\"在词'孙权/曹操/刘备/孙夫人'中，'{}'与其他词不属于同一类\".format(model.wv.doesnt_match(words.split())))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
